Load required packages

library(tidyverse)
library(phyloseq)
library(speedyseq)
library(ggrepel)
library(ampvis2)
library(plotly)
library(microbiome)
library(here)
options(getClass.msg=FALSE) # https://github.com/epurdom/clusterExperiment/issues/66
#this fixes an error message that pops up because the class 'Annotated' is defined in two different packages

Source required functions

'%!in%' <- function(x,y)!('%in%'(x,y))

source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_taxa_tests.R")
source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_normalisation.R")
source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_alpha.R")
source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_beta.R")
source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_heatmap.R")
plot_time <- function(df, 
                      measure,
                      x = "Day_from_Inoculum", 
                      y = "value", 
                      shape = "neg",
                      fill = "Reactor_Treatment",
                      group = "Reactor_Treatment", 
                      point_size=0.5,
                      facet,
                      smooth = FALSE)
{
  df %>%
    dplyr::filter(alphadiversiy %in% measure) %>%
    dplyr::mutate(alphadiversiy = fct_reorder(alphadiversiy, value, .desc = TRUE)) %>%
    dplyr::mutate(neg = ifelse(value == 0, "neg", "pos")) %>%
    arrange(Day_from_Inoculum) %>%
    ggplot(aes_string(x = x,
                      y = y)) +
    geom_jitter(alpha=0.9, size = point_size, aes_string(color = fill, fill = fill, shape = shape),  show.legend = TRUE) + 
    geom_path(inherit.aes = TRUE, aes_string(fill = fill, color = fill, show.legend = FALSE),
              size = 0.01,
              linetype = "dashed") +
    facet_grid(as.formula(facet), scales = "free") +
    geom_vline(xintercept = c(0),
               color="black", alpha=0.4) + theme_light() -> plot
  
  if(smooth == TRUE) 
  {
    plot +
      geom_smooth(show.legend = FALSE, level = 0.95, alpha=0.005, size = 0.5 ,aes_string(color = fill, fill = fill))  -> plot
  }
  # scale_y_continuous(labels = scientific,
  #                    limits=c(1e+10, 1e+11), breaks = seq(1e+10, 1e+11, by = 1e+10),
  #                    trans = "log10") +
  
  
  return(plot + theme(legend.position = "bottom"))
}

Inspection of strains:

ps = "data/raw/metabarcoding/ps_silva_dada2_human_chicken_meta.RDS"

ps %>% 
  here::here() %>%
  readRDS() %>%
  phyloseq_get_strains_fast() %>% 
  filter_taxa(function(x) sum(x > 0) > 0, TRUE) %>% 
  prune_samples(sample_sums(.)>= 100, .) %>% # only keep samples with at least 100 seqs.
  subset_samples(Reactor %in% c("STRAIN", "negative", "positive"))  %>% 
  # subset_samples(Experiment == "CCUG59168" |  Experiment ==  "HV292.1" | Experiment ==  "Cecum" |  Experiment ==  "Continuous" & Reactor %in% c("IR1", "CR")) %>%
  # rarefy_even_depth(sample.size = 2574,rngseed = 123) %>%
  phyloseq_ampvis_heatmap(transform = "compositional",
                          group_by = "SampleID",
                          facet_by = c("Fermentation", "Enrichment", "Reactor", "Reactor_Treatment"),
                          tax_aggregate = "Species",
                          tax_add = NULL,
                          ntax  = 50) -> p
## Joining, by = "ASV"
p$data %>% 
  mutate(Abundance = na_if(Abundance, 0)) -> p$data 


p + facet_grid( ~ Reactor , scales = "free", space = "free") + 
  scale_fill_viridis_c(breaks = c(0,  0.01, 1, 10, 50, 75, 100), 
                       labels = c(0,  0.01, 1, 10, 50, 75, 100), 
                       trans = scales::pseudo_log_trans(sigma = 0.001),
                       na.value = 'transparent') -> p2
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
p2

# p2 %>% 
  # export::graph2ppt(append = TRUE,
  #                   file = file.path(here::here("data/processed/figures_NRP72")))

Import phyloseq object

ps = "data/raw/metabarcoding/ps_silva_dada2_human_chicken_meta.RDS"

ps %>% 
  here::here() %>%
  readRDS() %>%
  phyloseq_get_strains_fast() %>% 
  filter_taxa(function(x) sum(x > 0) > 0, TRUE) %>% 
  subset_samples(Enrichment == "NotEnriched") -> physeq
## Joining, by = "ASV"
physeq
## phyloseq-class experiment-level object
## otu_table()   OTU Table:          [ 1110 taxa and 414 samples ]:
## sample_data() Sample Data:        [ 414 samples by 59 sample variables ]:
## tax_table()   Taxonomy Table:     [ 1110 taxa by 8 taxonomic ranks ]:
## phy_tree()    Phylogenetic Tree:  [ 1110 tips and 1109 internal nodes ]:
## refseq()      DNAStringSet     :      [ 1110 reference sequences ]
## taxa are rows
physeq %>% 
  sample_data() %>% 
  data.frame() %>% 
  DT::datatable()

Rarefaction:

physeq %>% 
  rarefy_even_depth(sample.size = 4576,
                    rngseed = 123) -> physeq_rare
## `set.seed(123)` was used to initialize repeatable random subsampling.
## Please record this for your records so others can reproduce.
## Try `set.seed(123); .Random.seed` for the full vector
## ...
## 73 samples removedbecause they contained fewer reads than `sample.size`.
## Up to first five removed samples are:
## CR-40-S166CR-52-S196EMPTY-S384negativ-S80negative2-S181
## ...
## 351OTUs were removed because they are no longer 
## present in any sample after random subsampling
## ...
physeq_rare
## phyloseq-class experiment-level object
## otu_table()   OTU Table:          [ 759 taxa and 341 samples ]:
## sample_data() Sample Data:        [ 341 samples by 59 sample variables ]:
## tax_table()   Taxonomy Table:     [ 759 taxa by 8 taxonomic ranks ]:
## phy_tree()    Phylogenetic Tree:  [ 759 tips and 758 internal nodes ]:
## refseq()      DNAStringSet     :      [ 759 reference sequences ]
## taxa are rows

heatmap:

physeq_rare %>%
  subset_samples(Treatment == "STRAIN" | Model == "Human" & Reactor %!in% c("negative", "positive"))  %>%
  # subset_samples(Experiment == "CCUG59168" |  Experiment ==  "HV292.1" | Experiment ==  "Cecum" |  Experiment ==  "Continuous" & Reactor %in% c("IR1", "CR")) %>%
  # rarefy_even_depth(sample.size = 2574,rngseed = 123) %>%
  phyloseq_ampvis_heatmap(transform = "compositional",
                          group_by = "SampleID",
                          facet_by = c("Fermentation", "Enrichment",  "Reactor_Treatment"),
                          tax_aggregate = "Species",
                          tax_add = NULL,
                          ntax  = 100) -> p

p$data %>% 
  mutate(Abundance = na_if(Abundance, 0)) -> p$data 


p + facet_grid( ~ Fermentation + Reactor_Treatment , scales = "free", space = "free") + 
  scale_fill_viridis_c(breaks = c(0,  0.01, 0.1, 0.2, 0.50, 0.75, 0.100), 
                       labels = c(0,  0.01, 0.1, 0.2, 0.50, 0.75, 0.100), 
                       trans = scales::pseudo_log_trans(sigma = 0.001),
                       na.value = 'transparent') -> p2
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
p2

# p2 %>% 
  # export::graph2ppt(append = TRUE,
  #                   file = file.path(here::here("data/processed/figures_NRP72")))

same at the genus level

physeq_rare %>%
  subset_samples(Treatment == "STRAIN" | Model == "Human" & Reactor %!in% c("negative", "positive"))  %>%
  # subset_samples(Experiment == "CCUG59168" |  Experiment ==  "HV292.1" | Experiment ==  "Cecum" |  Experiment ==  "Continuous" & Reactor %in% c("IR1", "CR")) %>%
  # rarefy_even_depth(sample.size = 2574,rngseed = 123) %>%
  phyloseq_ampvis_heatmap(transform = "compositional",
                          group_by = "SampleID",
                          facet_by = c("Fermentation", "Enrichment",  "Reactor_Treatment"),
                          tax_aggregate = "Genus",
                          tax_add = NULL,
                          ntax  = 50) -> p

p$data %>% 
  mutate(Abundance = na_if(Abundance, 0)) -> p$data 


p + facet_grid( ~ Fermentation + Reactor_Treatment , scales = "free", space = "free") + 
  scale_fill_viridis_c(breaks = c(0,  0.01, 0.1, 0.2, 0.50, 0.75, 0.100), 
                       labels = c(0,  0.01, 0.1, 0.2, 0.50, 0.75, 0.100), 
                       trans = scales::pseudo_log_trans(sigma = 0.001),
                       na.value = 'transparent') -> p2
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
p2

# p2 %>% 
  # export::graph2ppt(append = TRUE,
  #                   file = file.path(here::here("data/processed/figures_NRP72")))

same at the genus level

physeq_rare %>%
  subset_samples(Treatment == "STRAIN" | Experiment == "Continuous")  %>%
  # subset_samples(Experiment == "CCUG59168" |  Experiment ==  "HV292.1" | Experiment ==  "Cecum" |  Experiment ==  "Continuous" & Reactor %in% c("IR1", "CR")) %>%
  # rarefy_even_depth(sample.size = 2574,rngseed = 123) %>%
  phyloseq_ampvis_heatmap(transform = "compositional",
                          group_by = "SampleID",
                          facet_by = c("Model", "Fermentation", "Reactor_Treatment"),
                          tax_aggregate = "Genus",
                          tax_add = NULL,
                          ntax  = 50) -> p

p$data %>% 
  mutate(Abundance = na_if(Abundance, 0)) -> p$data 


p + facet_grid( ~Model + Fermentation + Reactor_Treatment , scales = "free", space = "free") + 
  scale_fill_viridis_c(breaks = c(0,  0.01, 0.1, 0.2, 0.50, 0.75, 0.100), 
                       labels = c(0,  0.01, 0.1, 0.2, 0.50, 0.75, 0.100), 
                       trans = scales::pseudo_log_trans(sigma = 0.001),
                       na.value = 'transparent') -> p2
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
p2

# p2 %>% 
  # export::graph2ppt(append = TRUE,
  #                   file = file.path(here::here("data/processed/figures_NRP72")))
physeq_rare %>%
  subset_samples(Treatment == "STRAIN" | Experiment == "Continuous")  %>%
  # subset_samples(Experiment == "CCUG59168" |  Experiment ==  "HV292.1" | Experiment ==  "Cecum" |  Experiment ==  "Continuous" & Reactor %in% c("IR1", "CR")) %>%
  # rarefy_even_depth(sample.size = 2574,rngseed = 123) %>%
  phyloseq_ampvis_heatmap(transform = "compositional",
                          group_by = "SampleID",
                          facet_by = c("Model", "Fermentation", "Reactor_Treatment"),
                          tax_aggregate = "Species",
                          tax_add = NULL,
                          ntax  = 200) -> p

p$data %>% 
  mutate(Abundance = na_if(Abundance, 0)) -> p$data 


p + facet_grid( ~Model + Fermentation + Reactor_Treatment , scales = "free", space = "free") + 
  scale_fill_viridis_c(breaks = c(0,  0.01, 0.1, 0.2, 0.50, 0.75, 0.100), 
                       labels = c(0,  0.01, 0.1, 0.2, 0.50, 0.75, 0.100), 
                       trans = scales::pseudo_log_trans(sigma = 0.001),
                       na.value = 'transparent') -> p2
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
p2

# p2 %>% 
  # export::graph2ppt(append = TRUE,
  #                   file = file.path(here::here("data/processed/figures_NRP72")))
sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] reshape2_1.4.4       scales_1.1.1         here_1.0.1          
##  [4] microbiome_1.10.0    plotly_4.9.2.1       ampvis2_2.6.5       
##  [7] ggrepel_0.8.2        speedyseq_0.5.0      phyloseq_1.34.0     
## [10] forcats_0.5.0        stringr_1.4.0        dplyr_1.0.4         
## [13] purrr_0.3.4          readr_1.4.0          tidyr_1.1.2         
## [16] tibble_3.0.6         ggplot2_3.3.3        tidyverse_1.3.0.9000
## 
## loaded via a namespace (and not attached):
##  [1] Rtsne_0.15          colorspace_2.0-0    ellipsis_0.3.1     
##  [4] rprojroot_2.0.2     XVector_0.28.0      fs_1.5.0           
##  [7] rstudioapi_0.13     farver_2.0.3        ggnet_0.1.0        
## [10] DT_0.15             lubridate_1.7.9     xml2_1.3.2         
## [13] codetools_0.2-16    splines_4.0.2       doParallel_1.0.16  
## [16] knitr_1.31          ade4_1.7-16         jsonlite_1.7.2     
## [19] broom_0.7.2         cluster_2.1.0       dbplyr_1.4.4       
## [22] compiler_4.0.2      httr_1.4.2          backports_1.2.1    
## [25] assertthat_0.2.1    Matrix_1.2-18       lazyeval_0.2.2     
## [28] cli_2.3.0           htmltools_0.5.1.1   prettyunits_1.1.1  
## [31] tools_4.0.2         igraph_1.2.6        gtable_0.3.0       
## [34] glue_1.4.2          Rcpp_1.0.6          Biobase_2.50.0     
## [37] cellranger_1.1.0    vctrs_0.3.6         Biostrings_2.56.0  
## [40] multtest_2.44.0     ape_5.4-1           nlme_3.1-149       
## [43] iterators_1.0.13    crosstalk_1.1.0.1   xfun_0.21          
## [46] network_1.16.0      rvest_0.3.6         lifecycle_1.0.0    
## [49] zlibbioc_1.34.0     MASS_7.3-52         hms_1.0.0          
## [52] parallel_4.0.2      biomformat_1.7.0    rhdf5_2.32.4       
## [55] RColorBrewer_1.1-2  yaml_2.2.1          stringi_1.5.3      
## [58] highr_0.8           S4Vectors_0.26.1    foreach_1.5.1      
## [61] permute_0.9-5       BiocGenerics_0.34.0 rlang_0.4.10       
## [64] pkgconfig_2.0.3     evaluate_0.14       lattice_0.20-41    
## [67] Rhdf5lib_1.10.1     patchwork_1.1.0     htmlwidgets_1.5.3  
## [70] tidyselect_1.1.0    plyr_1.8.6          magrittr_2.0.1     
## [73] R6_2.5.0            IRanges_2.22.2      generics_0.1.0     
## [76] DBI_1.1.1           pillar_1.4.7        haven_2.3.1        
## [79] withr_2.4.1         mgcv_1.8-32         survival_3.2-3     
## [82] modelr_0.1.8        crayon_1.4.1        rmarkdown_2.4      
## [85] progress_1.2.2      grid_4.0.2          readxl_1.3.1       
## [88] data.table_1.13.6   blob_1.2.1          vegan_2.5-7        
## [91] reprex_0.3.0        digest_0.6.27       stats4_4.0.2       
## [94] munsell_0.5.0       viridisLite_0.3.0